Introduction

This report summarizes the analysis, including descriptive statistics, ANOVA, Response Surface Modeling, and Cross-Validation. It also provides various visualizations to interpret the results effectively.

  1. Load Libraries
check_and_install <- function(package) {
  if (!requireNamespace(package, quietly = TRUE)) {
    install.packages(package, dependencies = TRUE)
  }
}

required_packages <- c("dplyr", "ggplot2", "car", "rsm", "caret", "gridExtra", "plotly")

for (pkg in required_packages) {
  check_and_install(pkg)
}

library(dplyr)
library(ggplot2)
library(car)
library(rsm)
library(caret)
library(gridExtra)
library(plotly)
print("All required libraries are installed and loaded.")
## [1] "All required libraries are installed and loaded."

2. Load Dataset

data <- read.csv("~/Downloads/dataupdate.csv")
print("Dataset Preview:")
## [1] "Dataset Preview:"
head(data)
##   Run Bacteria Cementation  pH Zeolite comactive_effort consolidation
## 1   1 2.24e+09        0.75 6.5      10           2693.3         0.114
## 2   2 3.50e+09        1.00 3.5       6           2693.3         0.089
## 3   3 3.50e+09        0.50 3.5      10           1009.2         0.116
## 4   4 1.50e+09        1.00 3.5       6           1009.2         0.128
## 5   5 2.24e+09        0.75 6.5      10            593.3         0.075
## 6   6 3.50e+09        0.75 9.5       6           1009.2         0.092
##   permeability shear_strength X.Erodibility calcite_form Ammonia_Conc
## 1      8.6e-06          18.92         39.03         3.85     3.86e-06
## 2      5.8e-06          16.70         35.82         2.00     3.04e-06
## 3      7.1e-06          22.14         24.56         7.03     1.39e-06
## 4      7.5e-06          31.21         16.59         5.65     3.86e-06
## 5      4.9e-06          17.79         45.13         5.43     4.68e-06
## 6      4.1e-06           6.71         34.77         8.75     2.22e-06

3. Descriptive Statistics

summary_stats <- data %>% 
  summarise(across(where(is.numeric), list(
    mean = ~ mean(.),
    sd = ~ sd(.),
    median = ~ median(.),
    min = ~ min(.),
    max = ~ max(.)
  )))

summary_stats
##   Run_mean   Run_sd Run_median Run_min Run_max Bacteria_mean Bacteria_sd
## 1       16 9.092121         16       1      31    2512903226   849918402
##   Bacteria_median Bacteria_min Bacteria_max Cementation_mean Cementation_sd
## 1        2.24e+09      1.5e+09      3.5e+09         0.733871      0.2134585
##   Cementation_median Cementation_min Cementation_max pH_mean    pH_sd pH_median
## 1               0.75             0.5               1     6.5 2.569047       6.5
##   pH_min pH_max Zeolite_mean Zeolite_sd Zeolite_median Zeolite_min Zeolite_max
## 1    3.5    9.5     6.387097   3.480205              6           2          10
##   comactive_effort_mean comactive_effort_sd comactive_effort_median
## 1              1431.713            901.5663                  1009.2
##   comactive_effort_min comactive_effort_max consolidation_mean consolidation_sd
## 1                593.3               2693.3          0.1061935       0.04230557
##   consolidation_median consolidation_min consolidation_max permeability_mean
## 1                0.114             0.035             0.188      8.787419e-06
##   permeability_sd permeability_median permeability_min permeability_max
## 1    5.737605e-06             7.5e-06         1.21e-06         2.91e-05
##   shear_strength_mean shear_strength_sd shear_strength_median
## 1            16.91194          8.166283                 17.02
##   shear_strength_min shear_strength_max X.Erodibility_mean X.Erodibility_sd
## 1               6.11               39.7           29.82323         14.93195
##   X.Erodibility_median X.Erodibility_min X.Erodibility_max calcite_form_mean
## 1                25.04              5.49             67.62          4.675161
##   calcite_form_sd calcite_form_median calcite_form_min calcite_form_max
## 1        2.043531                4.18             1.78             8.75
##   Ammonia_Conc_mean Ammonia_Conc_sd Ammonia_Conc_median Ammonia_Conc_min
## 1      2.668226e-06    1.120157e-06            2.22e-06         5.75e-07
##   Ammonia_Conc_max
## 1         5.51e-06

4. ANOVA Analysis

anova_model <- aov(consolidation ~ Bacteria + Cementation + pH + Zeolite + comactive_effort, data = data)
anova_summary <- summary(anova_model)
anova_summary
##                  Df  Sum Sq  Mean Sq F value Pr(>F)
## Bacteria          1 0.00047 0.000471   0.247  0.623
## Cementation       1 0.00326 0.003256   1.712  0.203
## pH                1 0.00147 0.001467   0.771  0.388
## Zeolite           1 0.00082 0.000820   0.431  0.517
## comactive_effort  1 0.00013 0.000134   0.070  0.793
## Residuals        25 0.04755 0.001902

5. Response Surface Methodology

rsm_model <- rsm(consolidation ~ FO(Bacteria, Cementation, pH, Zeolite, comactive_effort), data = data)
rsm_summary <- summary(rsm_model)
rsm_summary
## 
## Call:
## rsm(formula = consolidation ~ FO(Bacteria, Cementation, pH, Zeolite, 
##     comactive_effort), data = data)
## 
##                     Estimate  Std. Error t value Pr(>|t|)  
## (Intercept)       1.1218e-01  4.4903e-02  2.4982  0.01942 *
## Bacteria         -4.9114e-12  9.6152e-12 -0.5108  0.61397  
## Cementation       4.8909e-02  3.7783e-02  1.2945  0.20733  
## pH               -2.5182e-03  3.1594e-03 -0.7971  0.43292  
## Zeolite          -1.5213e-03  2.3091e-03 -0.6588  0.51602  
## comactive_effort -2.4105e-06  9.0923e-06 -0.2651  0.79310  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Multiple R-squared:  0.1145, Adjusted R-squared:  -0.06262 
## F-statistic: 0.6464 on 5 and 25 DF,  p-value: 0.6667
## 
## Analysis of Variance Table
## 
## Response: consolidation
##                                                          Df   Sum Sq   Mean Sq
## FO(Bacteria, Cementation, pH, Zeolite, comactive_effort)  5 0.006147 0.0012294
## Residuals                                                25 0.047546 0.0019018
## Lack of fit                                              20 0.042099 0.0021049
## Pure error                                                5 0.005447 0.0010894
##                                                          F value Pr(>F)
## FO(Bacteria, Cementation, pH, Zeolite, comactive_effort)  0.6464 0.6667
## Residuals                                                              
## Lack of fit                                               1.9322 0.2400
## Pure error                                                             
## 
## Direction of steepest ascent (at radius 1):
##         Bacteria      Cementation               pH          Zeolite 
##    -1.002375e-10     9.981957e-01    -5.139420e-02    -3.104930e-02 
## comactive_effort 
##    -4.919534e-05 
## 
## Corresponding increment in original units:
##         Bacteria      Cementation               pH          Zeolite 
##    -1.002375e-10     9.981957e-01    -5.139420e-02    -3.104930e-02 
## comactive_effort 
##    -4.919534e-05

6. Cross-Validation

control <- trainControl(method = "cv", number = 10, savePredictions = "final")

cv_model <- train(consolidation ~ Bacteria + Cementation + pH + Zeolite + comactive_effort, 
                  data = data, 
                  method = "lm", 
                  trControl = control)

cv_predictions <- cv_model$pred

ggplot(cv_predictions, aes(x = obs, y = pred)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(
    title = "Cross-Validation: Observed vs Predicted",
    x = "Observed Values",
    y = "Predicted Values"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

7. Visualizations

Scatterplot Matrix

pairs(data %>% select(where(is.numeric)), main = "Scatterplot Matrix")

Boxplot

boxplot_plot <- ggplot(data, aes(x = as.factor(Bacteria), y = consolidation)) +
  geom_boxplot() +
  labs(
    title = "Boxplot of Consolidation by Bacteria",
    x = "Bacteria",
    y = "Consolidation"
  ) +
  theme_minimal()
boxplot_plot

Contour Plot

contour(rsm_model, ~ Bacteria + Cementation, main = "Response Surface: Bacteria and Cementation")

Interaction Plot

interaction.plot(data$Bacteria, data$Cementation, data$consolidation, 
                 main = "Interaction Plot: Bacteria and Cementation",
                 xlab = "Bacteria", ylab = "Consolidation")

Combined Visualizations

grid.arrange(
  boxplot_plot, 
  ggplot(cv_predictions, aes(x = obs, y = pred)) +
    geom_point(color = "blue") +
    geom_smooth(method = "lm", se = FALSE, color = "red") +
    labs(
      title = "Cross-Validation: Observed vs Predicted",
      x = "Observed Values",
      y = "Predicted Values"
    ) +
    theme_minimal(), 
  ncol = 2
)
## `geom_smooth()` using formula = 'y ~ x'

fig <- plot_ly(data = data, 
               x = ~Bacteria, 
               y = ~Cementation, 
               z = ~consolidation, 
               type = "scatter3d", 
               mode = "markers", 
               marker = list(size = 5, color = ~consolidation, colorscale = "Viridis", showscale = TRUE)) %>%
  layout(title = "3D Visualization: Bacteria, Cementation, and Consolidation",
         scene = list(
           xaxis = list(title = 'Bacteria'),
           yaxis = list(title = 'Cementation'),
           zaxis = list(title = 'Consolidation')
         ))

fig